{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Train mobilenet image classifier\n",
    "Currenly uses mobilenet V2 to train an image classifier.\n",
    "\n",
    "Future work:  \n",
    "[] support different deep learning neural network architectures (like VGG, ResNet, ...)  \n",
    "[] expose hyper parameters of those architectures as input to this component  \n",
    "[] support hyperparameter tuning (ideally using Katib)  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "papermill": {
     "duration": 2.413514,
     "end_time": "2021-01-28T15:59:34.938111",
     "exception": false,
     "start_time": "2021-01-28T15:59:32.524597",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "import os\n",
    "import sys\n",
    "#os.environ['create_image']='True'\n",
    "os.environ['repository']='romeokienzler'\n",
    "os.environ['version']='0.15'\n",
    "#\n",
    "#os.environ['install_requirements']='True'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "scrolled": true,
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Sending build context to Docker daemon  96.26kB\n",
      "Step 1/3 : FROM registry.access.redhat.com/ubi8/python-39\n",
      " ---> a531ae76755e\n",
      "Step 2/3 : RUN pip install ipython nbformat tensorflow==2.9.1\n",
      " ---> Using cache\n",
      " ---> ebc26374c664\n",
      "Step 3/3 : ADD train-mobilenet_v2.ipynb /\n",
      " ---> 0a65716cf0cf\n",
      "Successfully built 0a65716cf0cf\n",
      "Successfully tagged claimed-train-mobilenet_v2:0.15\n",
      "The push refers to repository [docker.io/romeokienzler/claimed-train-mobilenet_v2]\n",
      "\n",
      "\u001b[1B5f91aec9: Preparing \n",
      "\u001b[1B7d7d69b5: Preparing \n",
      "\u001b[1B2e0f4ef5: Preparing \n",
      "\u001b[1B276847a2: Preparing \n",
      "\u001b[1B534f4e1b: Preparing \n",
      "\u001b[1Bc926eef9: Preparing \n",
      "\u001b[7B5f91aec9: Pushed lready exists 3kBA\u001b[2K\u001b[7A\u001b[2K\u001b[1A\u001b[2K\u001b[7A\u001b[2K0.15: digest: sha256:a7233cfea658b1c5babd6d65501de723e15d4d10f9f721a1929bf65007988e21 size: 1795\n"
     ]
    },
    {
     "ename": "SystemExit",
     "evalue": "0",
     "output_type": "error",
     "traceback": [
      "An exception has occurred, use %tb to see the full traceback.\n",
      "\u001b[0;31mSystemExit\u001b[0m\u001b[0;31m:\u001b[0m 0\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/romeokienzler/.local/lib/python3.9/site-packages/IPython/core/interactiveshell.py:3406: UserWarning: To exit: use 'exit', 'quit', or Ctrl-D.\n",
      "  warn(\"To exit: use 'exit', 'quit', or Ctrl-D.\", stacklevel=1)\n"
     ]
    }
   ],
   "source": [
    "if bool(os.environ.get('create_image',False)):\n",
    "\n",
    "    docker_file=\"\"\"\n",
    "    FROM registry.access.redhat.com/ubi8/python-39\n",
    "    RUN pip install ipython nbformat tensorflow==2.9.1\n",
    "    ADD train-mobilenet_v2.ipynb /\n",
    "    \"\"\"\n",
    "    with open(\"Dockerfile\", \"w\") as text_file:\n",
    "        text_file.write(docker_file)\n",
    "        \n",
    "\n",
    "    !docker build -t claimed-train-mobilenet_v2:`echo $version` .\n",
    "    !docker tag claimed-train-mobilenet_v2:`echo $version` `echo $repository`/claimed-train-mobilenet_v2:`echo $version`\n",
    "    !docker push `echo $repository`/claimed-train-mobilenet_v2:`echo $version`\n",
    "    !rm Dockerfile\n",
    "    sys.exit(0)\n",
    "elif bool(os.environ.get('install_requirements',False)):\n",
    "    !pip install tensorflow==2.9.1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import tensorflow as tf\n",
    "# from tensorflow.keras.applications import ResNet50V2 # , MobileNetV3Small\n",
    "from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout\n",
    "from tensorflow.keras import Model\n",
    "from tensorflow import keras\n",
    "import os.path\n",
    "import glob\n",
    "import sys\n",
    "import logging"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# @dependency codait_utils.ipynb\n",
    "# @param image_shape shape the images shall be scaled to and the\n",
    "# model then will accept\n",
    "# @param model zip file name\n",
    "# @param data zip file name\n",
    "# @param model folder name\n",
    "# @param data folder name\n",
    "# @param epochs number of epochs to train\n",
    "# @param checkpoint activate checkpointing\n",
    "# @param checkpoint_ip minio endpoint\n",
    "# @param checkpoint_user minio user\n",
    "# @param checkpoint_pass minio pw\n",
    "# @param checkpoint_bucket minio bucket\n",
    "# @returns model zip file"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "image_shape = os.environ.get('image_shape', '400,400')\n",
    "image_folder = os.environ.get('image_folder', 'images')\n",
    "model_folder = os.environ.get('model_folder', 'model')\n",
    "batch_size = int(os.environ.get('batch_size', 32))\n",
    "optimizer = os.environ.get('optimizer', 'adam')\n",
    "loss = os.environ.get('loss', 'categorical_crossentropy')\n",
    "data_dir = os.environ.get('data_dir', '.')\n",
    "epochs = int(os.environ.get('epochs', 1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "parameters = list(\n",
    "    map(lambda s: re.sub('$', '\"', s),\n",
    "        map(\n",
    "            lambda s: s.replace('=', '=\"'),\n",
    "            filter(\n",
    "                lambda s: s.find('=') > -1 and bool(re.match(r'[A-Za-z0-9_]*=[.\\/A-Za-z0-9]*', s)),\n",
    "                sys.argv\n",
    "            )\n",
    "    )))\n",
    "\n",
    "\n",
    "for parameter in parameters:\n",
    "    exec(parameter)\n",
    "    logging.warning('Parameter: ' + parameter)\n",
    "\n",
    "batch_size = int(batch_size)\n",
    "\n",
    "\n",
    "for parameter in parameters:\n",
    "    exec(\"logging.warning('final parameter: ' + str({}))\".format(parameter.split('=')[0]))\n",
    "    exec(\"logging.warning('final parameter type: ' + str(type({})))\".format(parameter.split('=')[0]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#data_dir = '../../data/'\n",
    "#batch_size = 4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "papermill": {
     "duration": 0.822596,
     "end_time": "2021-01-28T15:59:38.817009",
     "exception": false,
     "start_time": "2021-01-28T15:59:37.994413",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "folder = glob.glob(data_dir + image_folder + \"/*\")\n",
    "num_classes = len(folder)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "papermill": {
     "duration": 0.127534,
     "end_time": "2021-01-28T15:59:38.952236",
     "exception": false,
     "start_time": "2021-01-28T15:59:38.824702",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "input_shape = 'dummy'  # make the compiler happy\n",
    "exec('input_shape = (' + image_shape + ')')\n",
    "\n",
    "train_ds = tf.keras.preprocessing.image_dataset_from_directory(\n",
    "    data_dir + image_folder,\n",
    "    validation_split=0.2,\n",
    "    subset=\"training\",\n",
    "    seed=123,\n",
    "    image_size=input_shape,\n",
    "    batch_size=batch_size)\n",
    "\n",
    "val_ds = tf.keras.preprocessing.image_dataset_from_directory(\n",
    "    data_dir + image_folder,\n",
    "    validation_split=0.2,\n",
    "    subset=\"validation\",\n",
    "    seed=123,\n",
    "    image_size=input_shape,\n",
    "    batch_size=batch_size)\n",
    "\n",
    "train_ds = train_ds.map(lambda x, y: (x, tf.one_hot(y, depth=num_classes)))\n",
    "val_ds = val_ds.map(lambda x, y: (x, tf.one_hot(y, depth=num_classes)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "papermill": {
     "duration": 0.014987,
     "end_time": "2021-01-28T15:59:38.975558",
     "exception": false,
     "start_time": "2021-01-28T15:59:38.960571",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "def my_net(model, num_classes, freeze_layers=10, full_freeze='N'):\n",
    "    x = model.output\n",
    "    x = GlobalAveragePooling2D()(x)\n",
    "    x = Dense(512, activation='relu')(x)\n",
    "    x = Dropout(0.5)(x)\n",
    "    x = Dense(512, activation='relu')(x)\n",
    "    x = Dropout(0.5)(x)\n",
    "    out = Dense(num_classes, activation='softmax')(x)\n",
    "    model_final = Model(model.input, out)\n",
    "    if full_freeze != 'N':\n",
    "        for layer in model.layers[0:freeze_layers]:\n",
    "            layer.trainable = False\n",
    "    return model_final"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "papermill": {
     "duration": 0.015812,
     "end_time": "2021-01-28T15:59:38.999942",
     "exception": false,
     "start_time": "2021-01-28T15:59:38.984130",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "# model = ResNet50V2(weights='imagenet',include_top=False)\n",
    "# model = my_net(model)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "papermill": {
     "duration": 5.130591,
     "end_time": "2021-01-28T15:59:44.139230",
     "exception": false,
     "start_time": "2021-01-28T15:59:39.008639",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "exec('input_shape = (' + image_shape + ',3)')\n",
    "\n",
    "model = tf.keras.applications.MobileNetV2(\n",
    "    input_shape=input_shape, alpha=1.0, include_top=False,\n",
    "    input_tensor=None, pooling=None, classes=num_classes,\n",
    "    classifier_activation='softmax'\n",
    ")\n",
    "model = my_net(model, num_classes=num_classes)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "papermill": {
     "duration": 0.031121,
     "end_time": "2021-01-28T15:59:44.197588",
     "exception": false,
     "start_time": "2021-01-28T15:59:44.166467",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "# model = tf.keras.applications.VGG16(\n",
    "#     include_top=True, weights=None, input_tensor=None,\n",
    "#     input_shape=(244, 244, 3), pooling=None, classes=2,\n",
    "#     classifier_activation='softmax'\n",
    "# )\n",
    "# model = my_net(model)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "papermill": {
     "duration": 0.040951,
     "end_time": "2021-01-28T15:59:44.261989",
     "exception": false,
     "start_time": "2021-01-28T15:59:44.221038",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "logging.warning('compiling model')\n",
    "\n",
    "\n",
    "model.compile(\n",
    "    optimizer=optimizer,\n",
    "    loss=loss,\n",
    "    metrics=['accuracy']\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class MetricLogger(keras.callbacks.Callback):\n",
    "    def on_train_end(self, logs=None):\n",
    "        logging.warning('Train-accuracy={}'.format(logs['accuracy']))\n",
    "        logging.warning('Validation-accuracy={}'.format(logs['val_accuracy']))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "papermill": {
     "duration": 21.224432,
     "end_time": "2021-01-28T16:00:05.511392",
     "exception": false,
     "start_time": "2021-01-28T15:59:44.286960",
     "status": "completed"
    },
    "scrolled": true,
    "tags": []
   },
   "outputs": [],
   "source": [
    "logging.warning('training model')\n",
    "\n",
    "\n",
    "model.fit(\n",
    "    train_ds,\n",
    "    batch_size=batch_size,\n",
    "    epochs=epochs,\n",
    "    validation_data=val_ds,\n",
    "    callbacks=[MetricLogger()]\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "logging.warning('saving model')\n",
    "\n",
    "model.save(data_dir + model_folder)\n",
    "\n",
    "logging.warning('done, exiting')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.13"
  },
  "papermill": {
   "default_parameters": {},
   "duration": 55.042719,
   "end_time": "2021-01-28T16:00:26.871724",
   "environment_variables": {},
   "exception": null,
   "input_path": "/home/jovyan/work/elyra-classification/train-trusted-ai.ipynb",
   "output_path": "/home/jovyan/work/elyra-classification/train-trusted-ai.ipynb",
   "parameters": {},
   "start_time": "2021-01-28T15:59:31.829005",
   "version": "2.2.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
